/*--------------------------------------------------------------------------------
	DESCRIPTION: Cleaning relevant Swiss datasets

--------------------------------------------------------------------------------*/

cap erase "$data_dir/raw/swiss_wages_oww.dta"
cap erase "$data_dir/raw/swiss_cpi.dta"
cap erase "$data_dir/raw/income_life_experience.dta"

*****************************************************************
**SWISS WAGE DATA FROM OWW

clear
use "$data_dir/raw/oww3.dta"

tempfile exrt_data temp_data

rename y0 year
rename country_name country
rename country_code ccode

//keep USA-SWISS exchange rate
preserve
keep if ccode=="CHE"
collapse exrt_usd, by(ccode year)
drop if year>=1971 & year!=. //drop because have exchange rate data
gen exrt_franc = 1/exrt_usd
label var exrt_franc "Exchange rate (USD to francs)"
append using "$data_dir/raw/usd_franc_exrt.dta" //data from https://fred.stlouisfed.org/data/EXSZUS
save `exrt_data'
restore

//get relevant data
gen isco88_2digit = real(substr(isco88, 1, 2))

preserve
keep if ccode=="CHE"
collapse hw3wl_current, by(ccode year isco88_2digit) //take average across occupations by ISCO 2-digit code
save `temp_data'
restore

gen cme = 1 if country=="Austria" | country=="Germany" | country=="Netherlands" | ///
			   country=="Belgium " | country=="Denmark" | country=="Sweden" | ///
			   country=="Finland" | country=="Iceland" | country=="Norway" //CMEs from Hall and Soskice (2001), excluding Japan

keep if cme==1
collapse hw3wl_us, by(isco88_2digit year) //take average across countries and occupations by ISCO 2-digit code
gen ccode="CHE"
sort ccode year
merge m:1 ccode year using `exrt_data'
drop if _merge==2
drop _merge

replace hw3wl_us = hw3wl_us/exrt_franc //convert to nominal francs from USD

merge 1:1 ccode year isco88_2digit using `temp_data'

corr hw3wl_current hw3wl_us //check correlation where overlap between Swiss and average CMEs data

gen swiss_wages = hw3wl_current if _merge==3 //just use Swiss values where Swiss data available
replace swiss_wages = hw3wl_us if swiss_wages==. & hw3wl_current==. //now use the CME average for missing data for Switzerland
drop _merge

keep ccode year isco88_2digit swiss_wages
sort isco88_2digit year

gen swiss_wages_final = .
foreach i in 71 72 73 74 81 82 83 91 93 {
	lowess swiss_wages year if isco88_2digit==`i', gen(lowess_`i') nograph //do lowess for these occupations because systematic 1983 "jump"
	
	replace swiss_wages_final = lowess_`i' if isco88_2digit==`i'
}

replace swiss_wages_final = swiss_wages if swiss_wages_final==.

keep ccode year isco88_2digit swiss_wages_final

gen income = .
replace income = swiss_wages_final*2000 if year>=1953 & year<=1984 //before 1984 legal change to min. 4 weeks, assume 2 weeks off so 2000 working hours in a year
replace income = swiss_wages_final*1920 if year>=1985 & year!=. //before 1984 legal change to min. 4 weeks, so 1920 working hours in a year

keep year isco88_2digit income
sort isco88_2digit year

save "$data_dir/raw/swiss_wages_oww.dta", replace

*****************************************************************
**SWISS CPI DATA

clear
import excel "$data_dir/raw/swiss_cpi.xlsx", sheet("Index_y") cellrange(A4:C115) firstrow clear
rename DatumDate year
drop B
rename C cpi_1939
drop if cpi_1939==.
drop if year>2023

save "$data_dir/raw/swiss_cpi.dta", replace

*****************************************************************
**SWISS INCOME DATA

clear
use "$data_dir/raw/SHP0_bvwl_user.dta"

drop if bvwl001==-3 | bvwl001==-1
drop if bvwl002==-3 | bvwl002==-1

gen start = bvwl001
gen end = bvwl002
gen n_years = end - start + 1
drop if n_years<0 & n_years!=.
expand n_years

bysort idpers bvwl_idx (start): gen year = start + _n - 1
drop if year<1953 & year!=.

tostring bvwl004, generate(test)
replace test = "" if test=="-3" | test=="-8" | test=="-9"
gen isco88_2digit = real(substr(test, 1, 2))

merge m:1 isco88_2digit year using "$data_dir/raw/swiss_wages_oww.dta"
drop if _merge==2
drop _merge

replace income = income*0.25 if bvwl006==1 //part time job less than 50% (0.25 is median between 0 and 50)
replace income = income*0.7 if bvwl006==2 //part time job (50 - 89%) (0.7 is median between 50 and 90)

gen income_with_0 = income
replace income_with_0 = 0 if income==. & bvwl007>=1 & bvwl007<=8 //assume 0 if non-active

collapse income income_with_0, by(idpers year) //if multiple occupations within a year, we take average

merge 1:1 idpers year using "$data_dir/raw/imputed_income_pers_long_shp.dta", keepusing(iptotni)

xtset idpers year
tsfill

rename income income_oww

gen income_continuous = iptotni
replace income_continuous = income_oww if income_continuous==. //use actual reported income of respondent if overlap

gen income_continuous_with_0 = income_continuous
replace income_continuous_with_0 = 0 if income_with_0==0 & income_continuous_with_0==.

drop income_oww
drop _merge

xtset idpers year

merge m:1 year using "$data_dir/raw/swiss_cpi.dta"
drop if _merge==2
drop _merge

summ cpi_1939 if year==2011 //get 1939 CPI value for 2011
gen real_income = income_continuous * r(mean) / cpi_1939 //convert income to francs in 2011
gen real_income_with_0 = income_continuous_with_0 * r(mean) / cpi_1939 //convert income to francs in 2011

keep idpers year real_income real_income_with_0
xtset idpers year

gen ln_real_income = ln(real_income)

gen income_growth = d.ln_real_income

save "$data_dir/raw/income_life_experience.dta", replace
